{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "2017.7.17\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from pandas import Series, DataFrame\n",
    "from sklearn.model_selection import train_test_split\n",
    "import xgboost as xgb\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_data_set = pd.read_csv(\"feature/xgb_train_data.csv\")\n",
    "test_data_set = pd.read_csv(\"feature/xgb_test_data.csv\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# 合并 训练数据 和 测试 数据，进行统一处理\n",
    "test_data_set['flag'] = -999\n",
    "train_test_data = pd.concat([train_data_set, test_data_set], axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(48842, 10)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_test_data.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>education_num</th>\n",
       "      <th>flag</th>\n",
       "      <th>hours_per_week</th>\n",
       "      <th>income</th>\n",
       "      <th>marital_status</th>\n",
       "      <th>race</th>\n",
       "      <th>relationship</th>\n",
       "      <th>sex</th>\n",
       "      <th>workclass</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>39</td>\n",
       "      <td>13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50</td>\n",
       "      <td>13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38</td>\n",
       "      <td>9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>53</td>\n",
       "      <td>7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28</td>\n",
       "      <td>13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   age  education_num  flag  hours_per_week  income  marital_status  race  \\\n",
       "0   39             13   NaN               1       0               2     1   \n",
       "1   50             13   NaN               0       0               1     1   \n",
       "2   38              9   NaN               1       0               3     1   \n",
       "3   53              7   NaN               1       0               1     2   \n",
       "4   28             13   NaN               1       0               1     2   \n",
       "\n",
       "   relationship  sex  workclass  \n",
       "0             2    1          4  \n",
       "1             1    1          2  \n",
       "2             2    1          1  \n",
       "3             1    1          1  \n",
       "4             5    0          1  "
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_test_data.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "F:\\Program Files\\Anaconda3\\lib\\site-packages\\ipykernel\\__main__.py:15: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "F:\\Program Files\\Anaconda3\\lib\\site-packages\\ipykernel\\__main__.py:16: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n"
     ]
    }
   ],
   "source": [
    "# 合并 训练数据 和 测试 数据，进行统一处理\n",
    "test_data_set['flag'] = -999\n",
    "train_test_data = pd.concat([train_data_set, test_data_set], axis=0)\n",
    "\n",
    "category_val = [\"education_num\", \"marital_status\", \"relationship\", \"workclass\"]\n",
    "for val in category_val:\n",
    "    val_dummies = pd.get_dummies(train_test_data[val])\n",
    "    val_dummies.columns = [val + '_' + str(i) for i in range(val_dummies.shape[1])]\n",
    "    train_test_data.drop(val, axis=1, inplace=True)\n",
    "    train_test_data = pd.concat([train_test_data, val_dummies], axis=1)\n",
    "\n",
    "train_data = train_test_data[train_test_data.flag != -999]\n",
    "test_data = train_test_data[train_test_data.flag == -999]\n",
    "\n",
    "train_data.drop(\"flag\", axis=1, inplace=True)\n",
    "test_data.drop(\"flag\", axis=1, inplace=True)\n",
    "\n",
    "train_data.to_csv(\"feature/feature_train.csv\", index=None)\n",
    "test_data.to_csv(\"feature/feature_test.csv\", index=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(32561, 43)\n",
      "(16281, 43)\n"
     ]
    }
   ],
   "source": [
    "print(train_data.shape)\n",
    "print(test_data.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>hours_per_week</th>\n",
       "      <th>income</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>education_num_0</th>\n",
       "      <th>education_num_1</th>\n",
       "      <th>education_num_2</th>\n",
       "      <th>education_num_3</th>\n",
       "      <th>education_num_4</th>\n",
       "      <th>...</th>\n",
       "      <th>relationship_5</th>\n",
       "      <th>workclass_0</th>\n",
       "      <th>workclass_1</th>\n",
       "      <th>workclass_2</th>\n",
       "      <th>workclass_3</th>\n",
       "      <th>workclass_4</th>\n",
       "      <th>workclass_5</th>\n",
       "      <th>workclass_6</th>\n",
       "      <th>workclass_7</th>\n",
       "      <th>workclass_8</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>39</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3 rows × 43 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   age  hours_per_week  income  race  sex  education_num_0  education_num_1  \\\n",
       "0   39               1       0     1    1              0.0              0.0   \n",
       "1   50               0       0     1    1              0.0              0.0   \n",
       "2   38               1       0     1    1              0.0              0.0   \n",
       "\n",
       "   education_num_2  education_num_3  education_num_4     ...       \\\n",
       "0              0.0              0.0              0.0     ...        \n",
       "1              0.0              0.0              0.0     ...        \n",
       "2              0.0              0.0              0.0     ...        \n",
       "\n",
       "   relationship_5  workclass_0  workclass_1  workclass_2  workclass_3  \\\n",
       "0             0.0          0.0          0.0          0.0          1.0   \n",
       "1             0.0          0.0          1.0          0.0          0.0   \n",
       "2             0.0          1.0          0.0          0.0          0.0   \n",
       "\n",
       "   workclass_4  workclass_5  workclass_6  workclass_7  workclass_8  \n",
       "0          0.0          0.0          0.0          0.0          0.0  \n",
       "1          0.0          0.0          0.0          0.0          0.0  \n",
       "2          0.0          0.0          0.0          0.0          0.0  \n",
       "\n",
       "[3 rows x 43 columns]"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "model_train_data = pd.read_csv(\"feature/feature_train.csv\")\n",
    "model_test_data = pd.read_csv(\"feature/feature_test.csv\")\n",
    "\n",
    "train_label = model_train_data[\"income\"]\n",
    "train_x = model_train_data.drop(\"income\", axis=1)\n",
    "\n",
    "test_label = model_test_data[\"income\"]\n",
    "test_x = model_test_data.drop(\"income\", axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "offset is  22793\n"
     ]
    }
   ],
   "source": [
    "offset = round(0.7*train_x.shape[0])\n",
    "print(\"offset is \", str(offset))\n",
    "\n",
    "dtrain = xgb.DMatrix(train_x[:offset], label=train_label[:offset])\n",
    "dval = xgb.DMatrix(train_x[offset:], label=train_label[offset:])\n",
    "\n",
    "dtest = xgb.DMatrix(test_x)\n",
    "\n",
    "watchlist = [(dtrain, 'train'), (dval, 'val')]\n",
    "\n",
    "params = {'booster': 'gbtree',\n",
    "          'objective': 'binary:logistic',\n",
    "          'eta': 0.001,\n",
    "          'lambda': 20,\n",
    "          'gamma': 3,\n",
    "          'eval_metric': 'auc',\n",
    "          'max_depth': 6,\n",
    "          'subsample': 0.6,\n",
    "          'colsample_bytree': 0.6,\n",
    "          'min_child_weight': 3,\n",
    "          'seed': 1024,\n",
    "          'nthread': 4,\n",
    "         }\n",
    "\n",
    "#param_list = list(params.items())\n",
    "num_round = 200\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0]\ttrain-auc:0.823864\tval-auc:0.820961\n",
      "Multiple eval metrics have been passed: 'val-auc' will be used for early stopping.\n",
      "\n",
      "Will train until val-auc hasn't improved in 100 rounds.\n",
      "[1]\ttrain-auc:0.835908\tval-auc:0.833029\n",
      "[2]\ttrain-auc:0.843178\tval-auc:0.841846\n",
      "[3]\ttrain-auc:0.851532\tval-auc:0.851493\n",
      "[4]\ttrain-auc:0.852079\tval-auc:0.851957\n",
      "[5]\ttrain-auc:0.851119\tval-auc:0.850815\n",
      "[6]\ttrain-auc:0.859819\tval-auc:0.859022\n",
      "[7]\ttrain-auc:0.860855\tval-auc:0.860615\n",
      "[8]\ttrain-auc:0.860033\tval-auc:0.859264\n",
      "[9]\ttrain-auc:0.86115\tval-auc:0.860812\n",
      "[10]\ttrain-auc:0.860859\tval-auc:0.861008\n",
      "[11]\ttrain-auc:0.860965\tval-auc:0.861369\n",
      "[12]\ttrain-auc:0.86165\tval-auc:0.861788\n",
      "[13]\ttrain-auc:0.861911\tval-auc:0.861801\n",
      "[14]\ttrain-auc:0.863522\tval-auc:0.863731\n",
      "[15]\ttrain-auc:0.863633\tval-auc:0.863807\n",
      "[16]\ttrain-auc:0.863731\tval-auc:0.864207\n",
      "[17]\ttrain-auc:0.863886\tval-auc:0.864502\n",
      "[18]\ttrain-auc:0.863933\tval-auc:0.864451\n",
      "[19]\ttrain-auc:0.863774\tval-auc:0.864157\n",
      "[20]\ttrain-auc:0.863481\tval-auc:0.863902\n",
      "[21]\ttrain-auc:0.863589\tval-auc:0.864176\n",
      "[22]\ttrain-auc:0.86389\tval-auc:0.864312\n",
      "[23]\ttrain-auc:0.863306\tval-auc:0.864433\n",
      "[24]\ttrain-auc:0.863138\tval-auc:0.864454\n",
      "[25]\ttrain-auc:0.863365\tval-auc:0.864791\n",
      "[26]\ttrain-auc:0.863469\tval-auc:0.864831\n",
      "[27]\ttrain-auc:0.86324\tval-auc:0.86418\n",
      "[28]\ttrain-auc:0.863101\tval-auc:0.863828\n",
      "[29]\ttrain-auc:0.86312\tval-auc:0.86437\n",
      "[30]\ttrain-auc:0.862958\tval-auc:0.864001\n",
      "[31]\ttrain-auc:0.863001\tval-auc:0.86406\n",
      "[32]\ttrain-auc:0.862997\tval-auc:0.863942\n",
      "[33]\ttrain-auc:0.86481\tval-auc:0.865336\n",
      "[34]\ttrain-auc:0.864504\tval-auc:0.865094\n",
      "[35]\ttrain-auc:0.864528\tval-auc:0.865007\n",
      "[36]\ttrain-auc:0.864584\tval-auc:0.864807\n",
      "[37]\ttrain-auc:0.864261\tval-auc:0.864573\n",
      "[38]\ttrain-auc:0.864445\tval-auc:0.865089\n",
      "[39]\ttrain-auc:0.864348\tval-auc:0.86479\n",
      "[40]\ttrain-auc:0.863857\tval-auc:0.864457\n",
      "[41]\ttrain-auc:0.863995\tval-auc:0.864295\n",
      "[42]\ttrain-auc:0.864301\tval-auc:0.864595\n",
      "[43]\ttrain-auc:0.86441\tval-auc:0.864614\n",
      "[44]\ttrain-auc:0.864142\tval-auc:0.864253\n",
      "[45]\ttrain-auc:0.8644\tval-auc:0.864585\n",
      "[46]\ttrain-auc:0.863724\tval-auc:0.86391\n",
      "[47]\ttrain-auc:0.863649\tval-auc:0.86388\n",
      "[48]\ttrain-auc:0.863567\tval-auc:0.863792\n",
      "[49]\ttrain-auc:0.863535\tval-auc:0.863734\n",
      "[50]\ttrain-auc:0.86365\tval-auc:0.86388\n",
      "[51]\ttrain-auc:0.863558\tval-auc:0.863779\n",
      "[52]\ttrain-auc:0.863831\tval-auc:0.864221\n",
      "[53]\ttrain-auc:0.86388\tval-auc:0.864333\n",
      "[54]\ttrain-auc:0.863537\tval-auc:0.864037\n",
      "[55]\ttrain-auc:0.863429\tval-auc:0.864006\n",
      "[56]\ttrain-auc:0.863303\tval-auc:0.86404\n",
      "[57]\ttrain-auc:0.86333\tval-auc:0.864148\n",
      "[58]\ttrain-auc:0.863411\tval-auc:0.864166\n",
      "[59]\ttrain-auc:0.863266\tval-auc:0.863977\n",
      "[60]\ttrain-auc:0.863396\tval-auc:0.86414\n",
      "[61]\ttrain-auc:0.863392\tval-auc:0.864138\n",
      "[62]\ttrain-auc:0.863335\tval-auc:0.864015\n",
      "[63]\ttrain-auc:0.863438\tval-auc:0.863919\n",
      "[64]\ttrain-auc:0.863326\tval-auc:0.863824\n",
      "[65]\ttrain-auc:0.863273\tval-auc:0.863876\n",
      "[66]\ttrain-auc:0.863141\tval-auc:0.863804\n",
      "[67]\ttrain-auc:0.863088\tval-auc:0.8638\n",
      "[68]\ttrain-auc:0.863287\tval-auc:0.864041\n",
      "[69]\ttrain-auc:0.863356\tval-auc:0.864201\n",
      "[70]\ttrain-auc:0.863332\tval-auc:0.864222\n",
      "[71]\ttrain-auc:0.863203\tval-auc:0.864092\n",
      "[72]\ttrain-auc:0.863075\tval-auc:0.864015\n",
      "[73]\ttrain-auc:0.863156\tval-auc:0.864082\n",
      "[74]\ttrain-auc:0.86319\tval-auc:0.864162\n",
      "[75]\ttrain-auc:0.863236\tval-auc:0.864119\n",
      "[76]\ttrain-auc:0.863306\tval-auc:0.864195\n",
      "[77]\ttrain-auc:0.863407\tval-auc:0.864248\n",
      "[78]\ttrain-auc:0.863449\tval-auc:0.864425\n",
      "[79]\ttrain-auc:0.863371\tval-auc:0.864333\n",
      "[80]\ttrain-auc:0.863423\tval-auc:0.864391\n",
      "[81]\ttrain-auc:0.863281\tval-auc:0.864325\n",
      "[82]\ttrain-auc:0.863326\tval-auc:0.864385\n",
      "[83]\ttrain-auc:0.86332\tval-auc:0.864422\n",
      "[84]\ttrain-auc:0.863363\tval-auc:0.864518\n",
      "[85]\ttrain-auc:0.863313\tval-auc:0.864493\n",
      "[86]\ttrain-auc:0.863293\tval-auc:0.864514\n",
      "[87]\ttrain-auc:0.863515\tval-auc:0.864674\n",
      "[88]\ttrain-auc:0.863553\tval-auc:0.864721\n",
      "[89]\ttrain-auc:0.863691\tval-auc:0.864779\n",
      "[90]\ttrain-auc:0.863623\tval-auc:0.864674\n",
      "[91]\ttrain-auc:0.863629\tval-auc:0.864569\n",
      "[92]\ttrain-auc:0.863786\tval-auc:0.864695\n",
      "[93]\ttrain-auc:0.863901\tval-auc:0.86476\n",
      "[94]\ttrain-auc:0.863993\tval-auc:0.864816\n",
      "[95]\ttrain-auc:0.863748\tval-auc:0.864504\n",
      "[96]\ttrain-auc:0.863728\tval-auc:0.864506\n",
      "[97]\ttrain-auc:0.863775\tval-auc:0.86465\n",
      "[98]\ttrain-auc:0.863894\tval-auc:0.864753\n",
      "[99]\ttrain-auc:0.86395\tval-auc:0.864837\n",
      "[100]\ttrain-auc:0.864407\tval-auc:0.865122\n",
      "[101]\ttrain-auc:0.864436\tval-auc:0.865149\n",
      "[102]\ttrain-auc:0.864479\tval-auc:0.865122\n",
      "[103]\ttrain-auc:0.864565\tval-auc:0.865215\n",
      "[104]\ttrain-auc:0.864474\tval-auc:0.865238\n",
      "[105]\ttrain-auc:0.864417\tval-auc:0.865337\n",
      "[106]\ttrain-auc:0.864414\tval-auc:0.865181\n",
      "[107]\ttrain-auc:0.864425\tval-auc:0.865234\n",
      "[108]\ttrain-auc:0.864442\tval-auc:0.865354\n",
      "[109]\ttrain-auc:0.864381\tval-auc:0.865239\n",
      "[110]\ttrain-auc:0.86437\tval-auc:0.865124\n",
      "[111]\ttrain-auc:0.864491\tval-auc:0.865275\n",
      "[112]\ttrain-auc:0.864559\tval-auc:0.865345\n",
      "[113]\ttrain-auc:0.864591\tval-auc:0.865371\n",
      "[114]\ttrain-auc:0.864648\tval-auc:0.865381\n",
      "[115]\ttrain-auc:0.86474\tval-auc:0.86547\n",
      "[116]\ttrain-auc:0.86474\tval-auc:0.865485\n",
      "[117]\ttrain-auc:0.864711\tval-auc:0.865545\n",
      "[118]\ttrain-auc:0.864757\tval-auc:0.865641\n",
      "[119]\ttrain-auc:0.864791\tval-auc:0.8656\n",
      "[120]\ttrain-auc:0.864838\tval-auc:0.865706\n",
      "[121]\ttrain-auc:0.864582\tval-auc:0.865429\n",
      "[122]\ttrain-auc:0.864679\tval-auc:0.865533\n",
      "[123]\ttrain-auc:0.864648\tval-auc:0.865492\n",
      "[124]\ttrain-auc:0.864641\tval-auc:0.865485\n",
      "[125]\ttrain-auc:0.864686\tval-auc:0.865493\n",
      "[126]\ttrain-auc:0.864582\tval-auc:0.865403\n",
      "[127]\ttrain-auc:0.864484\tval-auc:0.86532\n",
      "[128]\ttrain-auc:0.864595\tval-auc:0.865329\n",
      "[129]\ttrain-auc:0.864613\tval-auc:0.865408\n",
      "[130]\ttrain-auc:0.864711\tval-auc:0.865467\n",
      "[131]\ttrain-auc:0.864546\tval-auc:0.865355\n",
      "[132]\ttrain-auc:0.864441\tval-auc:0.86524\n",
      "[133]\ttrain-auc:0.864726\tval-auc:0.865521\n",
      "[134]\ttrain-auc:0.864739\tval-auc:0.86545\n",
      "[135]\ttrain-auc:0.864482\tval-auc:0.86529\n",
      "[136]\ttrain-auc:0.864558\tval-auc:0.865374\n",
      "[137]\ttrain-auc:0.864672\tval-auc:0.865476\n",
      "[138]\ttrain-auc:0.864738\tval-auc:0.865509\n",
      "[139]\ttrain-auc:0.864692\tval-auc:0.865488\n",
      "[140]\ttrain-auc:0.864836\tval-auc:0.865658\n",
      "[141]\ttrain-auc:0.864874\tval-auc:0.865745\n",
      "[142]\ttrain-auc:0.864905\tval-auc:0.865733\n",
      "[143]\ttrain-auc:0.864945\tval-auc:0.865795\n",
      "[144]\ttrain-auc:0.864955\tval-auc:0.86577\n",
      "[145]\ttrain-auc:0.864904\tval-auc:0.865693\n",
      "[146]\ttrain-auc:0.864946\tval-auc:0.865846\n",
      "[147]\ttrain-auc:0.864758\tval-auc:0.865479\n",
      "[148]\ttrain-auc:0.864773\tval-auc:0.865369\n",
      "[149]\ttrain-auc:0.864667\tval-auc:0.865259\n",
      "[150]\ttrain-auc:0.864632\tval-auc:0.865188\n",
      "[151]\ttrain-auc:0.864707\tval-auc:0.865274\n",
      "[152]\ttrain-auc:0.864763\tval-auc:0.865364\n",
      "[153]\ttrain-auc:0.864799\tval-auc:0.865286\n",
      "[154]\ttrain-auc:0.864771\tval-auc:0.865311\n",
      "[155]\ttrain-auc:0.86478\tval-auc:0.865299\n",
      "[156]\ttrain-auc:0.865019\tval-auc:0.865506\n",
      "[157]\ttrain-auc:0.86506\tval-auc:0.865507\n",
      "[158]\ttrain-auc:0.86499\tval-auc:0.86544\n",
      "[159]\ttrain-auc:0.865016\tval-auc:0.865448\n",
      "[160]\ttrain-auc:0.864775\tval-auc:0.865295\n",
      "[161]\ttrain-auc:0.864751\tval-auc:0.8653\n",
      "[162]\ttrain-auc:0.864769\tval-auc:0.865283\n",
      "[163]\ttrain-auc:0.864848\tval-auc:0.865356\n",
      "[164]\ttrain-auc:0.864825\tval-auc:0.865326\n",
      "[165]\ttrain-auc:0.864883\tval-auc:0.865359\n",
      "[166]\ttrain-auc:0.864855\tval-auc:0.865386\n",
      "[167]\ttrain-auc:0.864888\tval-auc:0.865449\n",
      "[168]\ttrain-auc:0.864903\tval-auc:0.865465\n",
      "[169]\ttrain-auc:0.864887\tval-auc:0.865368\n",
      "[170]\ttrain-auc:0.864794\tval-auc:0.86535\n",
      "[171]\ttrain-auc:0.864836\tval-auc:0.865416\n",
      "[172]\ttrain-auc:0.864916\tval-auc:0.865503\n",
      "[173]\ttrain-auc:0.864882\tval-auc:0.865505\n",
      "[174]\ttrain-auc:0.864757\tval-auc:0.865426\n",
      "[175]\ttrain-auc:0.864758\tval-auc:0.865403\n",
      "[176]\ttrain-auc:0.86482\tval-auc:0.865485\n",
      "[177]\ttrain-auc:0.864779\tval-auc:0.865436\n",
      "[178]\ttrain-auc:0.864757\tval-auc:0.865389\n",
      "[179]\ttrain-auc:0.864735\tval-auc:0.86533\n",
      "[180]\ttrain-auc:0.864767\tval-auc:0.865373\n",
      "[181]\ttrain-auc:0.864823\tval-auc:0.865422\n",
      "[182]\ttrain-auc:0.864735\tval-auc:0.865304\n",
      "[183]\ttrain-auc:0.864725\tval-auc:0.865284\n",
      "[184]\ttrain-auc:0.864696\tval-auc:0.865264\n",
      "[185]\ttrain-auc:0.86457\tval-auc:0.865133\n",
      "[186]\ttrain-auc:0.864507\tval-auc:0.865042\n",
      "[187]\ttrain-auc:0.864507\tval-auc:0.865037\n",
      "[188]\ttrain-auc:0.864432\tval-auc:0.86498\n",
      "[189]\ttrain-auc:0.864383\tval-auc:0.864952\n",
      "[190]\ttrain-auc:0.864423\tval-auc:0.865015\n",
      "[191]\ttrain-auc:0.864417\tval-auc:0.865003\n",
      "[192]\ttrain-auc:0.864464\tval-auc:0.86508\n",
      "[193]\ttrain-auc:0.864446\tval-auc:0.865085\n",
      "[194]\ttrain-auc:0.864382\tval-auc:0.865023\n",
      "[195]\ttrain-auc:0.864365\tval-auc:0.865032\n",
      "[196]\ttrain-auc:0.864405\tval-auc:0.865127\n",
      "[197]\ttrain-auc:0.864425\tval-auc:0.865089\n",
      "[198]\ttrain-auc:0.864405\tval-auc:0.865138\n",
      "[199]\ttrain-auc:0.864395\tval-auc:0.865191\n",
      "[200]\ttrain-auc:0.864382\tval-auc:0.865166\n",
      "[201]\ttrain-auc:0.86445\tval-auc:0.865183\n",
      "[202]\ttrain-auc:0.864502\tval-auc:0.865232\n",
      "[203]\ttrain-auc:0.864518\tval-auc:0.865259\n",
      "[204]\ttrain-auc:0.864508\tval-auc:0.865292\n",
      "[205]\ttrain-auc:0.864573\tval-auc:0.865313\n",
      "[206]\ttrain-auc:0.864584\tval-auc:0.865311\n",
      "[207]\ttrain-auc:0.864574\tval-auc:0.865349\n",
      "[208]\ttrain-auc:0.86454\tval-auc:0.865241\n",
      "[209]\ttrain-auc:0.864539\tval-auc:0.865228\n",
      "[210]\ttrain-auc:0.864561\tval-auc:0.86522\n",
      "[211]\ttrain-auc:0.864561\tval-auc:0.865234\n",
      "[212]\ttrain-auc:0.864575\tval-auc:0.865221\n",
      "[213]\ttrain-auc:0.864652\tval-auc:0.865342\n",
      "[214]\ttrain-auc:0.86458\tval-auc:0.86525\n",
      "[215]\ttrain-auc:0.864608\tval-auc:0.865331\n",
      "[216]\ttrain-auc:0.864627\tval-auc:0.865325\n",
      "[217]\ttrain-auc:0.864589\tval-auc:0.865308\n",
      "[218]\ttrain-auc:0.864605\tval-auc:0.865299\n",
      "[219]\ttrain-auc:0.864603\tval-auc:0.865282\n",
      "[220]\ttrain-auc:0.86474\tval-auc:0.86544\n",
      "[221]\ttrain-auc:0.864695\tval-auc:0.865398\n",
      "[222]\ttrain-auc:0.864636\tval-auc:0.865346\n",
      "[223]\ttrain-auc:0.864632\tval-auc:0.865375\n",
      "[224]\ttrain-auc:0.864526\tval-auc:0.865254\n",
      "[225]\ttrain-auc:0.864662\tval-auc:0.865339\n",
      "[226]\ttrain-auc:0.864617\tval-auc:0.865295\n",
      "[227]\ttrain-auc:0.864617\tval-auc:0.86527\n",
      "[228]\ttrain-auc:0.864639\tval-auc:0.86528\n",
      "[229]\ttrain-auc:0.86474\tval-auc:0.865404\n",
      "[230]\ttrain-auc:0.86466\tval-auc:0.865314\n",
      "[231]\ttrain-auc:0.864672\tval-auc:0.865339\n",
      "[232]\ttrain-auc:0.86468\tval-auc:0.865315\n",
      "[233]\ttrain-auc:0.864776\tval-auc:0.865426\n",
      "[234]\ttrain-auc:0.864746\tval-auc:0.865362\n",
      "[235]\ttrain-auc:0.864652\tval-auc:0.865255\n",
      "[236]\ttrain-auc:0.864647\tval-auc:0.865213\n",
      "[237]\ttrain-auc:0.864634\tval-auc:0.865242\n",
      "[238]\ttrain-auc:0.86461\tval-auc:0.865193\n",
      "[239]\ttrain-auc:0.864608\tval-auc:0.865132\n",
      "[240]\ttrain-auc:0.86458\tval-auc:0.865071\n",
      "[241]\ttrain-auc:0.864534\tval-auc:0.865028\n",
      "[242]\ttrain-auc:0.864548\tval-auc:0.865005\n",
      "[243]\ttrain-auc:0.86451\tval-auc:0.864997\n",
      "[244]\ttrain-auc:0.864422\tval-auc:0.864982\n",
      "[245]\ttrain-auc:0.86432\tval-auc:0.864903\n",
      "[246]\ttrain-auc:0.864281\tval-auc:0.864916\n",
      "Stopping. Best iteration:\n",
      "[146]\ttrain-auc:0.864946\tval-auc:0.865846\n",
      "\n"
     ]
    }
   ],
   "source": [
    "model = xgb.train(params, dtrain, num_boost_round=num_round, evals=watchlist, early_stopping_rounds=100)\n",
    "model.save_model('model/xgb.model')\n",
    "print(\"best best_ntree_limit\", model.best_ntree_limit)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'model' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-4-691731ea2d36>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpredict_y\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtest_x\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m: name 'model' is not defined"
     ]
    }
   ],
   "source": [
    "predict_y = model.predict(test_x)"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
